if (!require("dplyr")) install.packages("dplyr")
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
if (!require("skimr")) install.packages("skimr")
## Loading required package: skimr
if (!require("tidyr")) install.packages("tidyr")
## Loading required package: tidyr
if (!require("survival")) install.packages("survival")
## Loading required package: survival
if (!require("survminer")) install.packages("survminer")
## Loading required package: survminer
## Loading required package: ggplot2
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
if (!require("haven")) install.packages("haven")
## Loading required package: haven
if (!require("broom")) install.packages("broom")
## Loading required package: broom
if (!require("rms")) install.packages("rms")
## Loading required package: rms
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
if (!require("tidyverse")) install.packages("tidyverse")
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ lubridate 1.9.4 ✔ stringr 1.5.1
## ✔ purrr 1.0.4 ✔ tibble 3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ Hmisc::src() masks dplyr::src()
## ✖ Hmisc::summarize() masks dplyr::summarize()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
if (!require("tableone")) install.packages("tableone")
## Loading required package: tableone
library(dplyr)
library(skimr)
library(tidyr)
library(survival)
library(survminer)
library(haven)
library(broom)
library(rms)
library(tidyverse)
library(tableone)
NHANES2 <- read.csv("NHANES2-1 (1).csv")
d <- NHANES2 #%>%
#select('ROWNAMES','SEX','RACE','MARRY','DEATH','AGEYRS',
#'GRADES','WT', 'BOOZE', 'SIZE',
#'AVGSMK', "HEIGHT", "EXAM_YR", "DIE_YR", "LAST_YR")
#Exclude missing death
d <- d %>%
filter(!is.na(BOOZE), !is.na(DEATH), !is.na(SEX), !is.na(RACE), !is.na(GRADES), !is.na(MARRY), !is.na(AVGSMK), !is.na(SIZE), !is.na(GRADES))
#BMI
d <- d %>%
mutate(BMI = WT / (HEIGHT / 100)^2)
head(d$BMI)
## [1] 20.49522 21.02151 23.22748 35.72785 27.92312 30.50132
# GRADES and SIZE categories
d$EDUC_CAT <- cut(d$GRADES,
breaks = c(-Inf, 8, 11, 12, 15, Inf),
labels = c("≤8 yrs", "Some HS", "HS Grad", "Some College", "College+"),
right = TRUE)
d$SIZE_CAT <- cut(d$SIZE,
breaks = c(0, 3, 5, 7, 8),
labels = c("Rural", "Small town", "Medium city", "Large city"),
right = TRUE)
# Catergorical BOOZE
d <- d %>%
mutate(BOOZE_q = cut(
BOOZE,
breaks = c(-1, 0, 0.5, 2.0, 77.0),
include.lowest = TRUE,
labels = c("0/week", "0–0.5/week", "0.5–2/week", ">2/week")
))
vars <- c("AGEYRS", "SEX", "RACE", "MARRY", "BMI", "AVGSMK", "EDUC_CAT", "SIZE_CAT")
catVars <- c("SEX", "RACE", "MARRY")
#Table 1
table1 <- CreateTableOne(vars = vars,
data = d,
strata = "BOOZE_q",
factorVars = catVars)
print(table1, showAllLevels = TRUE)
## Stratified by BOOZE_q
## level 0/week 0–0.5/week 0.5–2/week
## n 4053 941 1729
## AGEYRS (mean (SD)) 57.09 (12.79) 54.34 (13.36) 51.60 (13.53)
## SEX (%) 1 1448 (35.7) 367 (39.0) 856 (49.5)
## 2 2605 (64.3) 574 (61.0) 873 (50.5)
## RACE (%) 1 3497 (86.3) 827 (87.9) 1515 (87.6)
## 2 475 (11.7) 93 ( 9.9) 194 (11.2)
## 3 81 ( 2.0) 21 ( 2.2) 20 ( 1.2)
## MARRY (%) 2 2885 (71.2) 683 (72.6) 1288 (74.5)
## 3 671 (16.6) 127 (13.5) 172 ( 9.9)
## 4 190 ( 4.7) 67 ( 7.1) 102 ( 5.9)
## 5 96 ( 2.4) 20 ( 2.1) 59 ( 3.4)
## 6 202 ( 5.0) 40 ( 4.3) 103 ( 6.0)
## 8 9 ( 0.2) 4 ( 0.4) 5 ( 0.3)
## BMI (mean (SD)) 26.55 (5.50) 26.42 (5.10) 26.03 (4.84)
## AVGSMK (mean (SD)) 4.82 (10.84) 6.72 (12.63) 8.26 (13.77)
## EDUC_CAT (%) ≤8 yrs 1452 (35.8) 217 (23.1) 337 (19.5)
## Some HS 753 (18.6) 185 (19.7) 282 (16.3)
## HS Grad 1209 (29.8) 311 (33.0) 636 (36.8)
## Some College 353 ( 8.7) 130 (13.8) 235 (13.6)
## College+ 286 ( 7.1) 98 (10.4) 239 (13.8)
## SIZE_CAT (%) Rural 1101 (27.2) 348 (37.0) 758 (43.8)
## Small town 454 (11.2) 123 (13.1) 244 (14.1)
## Medium city 569 (14.0) 118 (12.5) 205 (11.9)
## Large city 1929 (47.6) 352 (37.4) 522 (30.2)
## Stratified by BOOZE_q
## >2/week p test
## n 2527
## AGEYRS (mean (SD)) 51.71 (13.18) <0.001
## SEX (%) 1678 (66.4) <0.001
## 849 (33.6)
## RACE (%) 2250 (89.0) 0.010
## 236 ( 9.3)
## 41 ( 1.6)
## MARRY (%) 1972 (78.0) <0.001
## 170 ( 6.7)
## 168 ( 6.6)
## 70 ( 2.8)
## 140 ( 5.5)
## 7 ( 0.3)
## BMI (mean (SD)) 25.20 (4.08) <0.001
## AVGSMK (mean (SD)) 9.60 (13.92) <0.001
## EDUC_CAT (%) 387 (15.3) <0.001
## 354 (14.0)
## 876 (34.7)
## 411 (16.3)
## 499 (19.7)
## SIZE_CAT (%) 1239 (49.0) <0.001
## 346 (13.7)
## 261 (10.3)
## 681 (26.9)
#Create follow-up time
d$start <- d$EXAM_YR + d$EXAM_MO / 12
d$end <- ifelse(d$DEATH == 1,
d$DIE_YR + d$DIE_MO / 12,
d$LAST_YR + d$LAST_MO / 12)
d$FU <- d$end - d$start
#Adjusted Cox regression with SEX
cox <- coxph(Surv(FU, DEATH) ~ as.factor(BOOZE_q) + SEX +
as.factor(RACE) + as.factor(GRADES) + as.factor(MARRY) + BMI +
AVGSMK + SIZE, data = d, ties='efron')
summary(cox)
## Call:
## coxph(formula = Surv(FU, DEATH) ~ as.factor(BOOZE_q) + SEX +
## as.factor(RACE) + as.factor(GRADES) + as.factor(MARRY) +
## BMI + AVGSMK + SIZE, data = d, ties = "efron")
##
## n= 9250, number of events= 2145
##
## coef exp(coef) se(coef) z Pr(>|z|)
## as.factor(BOOZE_q)0–0.5/week -0.131786 0.876529 0.075867 -1.737 0.082376 .
## as.factor(BOOZE_q)0.5–2/week -0.390542 0.676690 0.065568 -5.956 2.58e-09 ***
## as.factor(BOOZE_q)>2/week -0.333146 0.716665 0.058474 -5.697 1.22e-08 ***
## SEX -0.773141 0.461561 0.049172 -15.723 < 2e-16 ***
## as.factor(RACE)2 -0.265985 0.766450 0.075590 -3.519 0.000434 ***
## as.factor(RACE)3 -0.513794 0.598222 0.198879 -2.583 0.009782 **
## as.factor(GRADES)1 0.599987 1.822095 0.314018 1.911 0.056046 .
## as.factor(GRADES)2 0.520093 1.682184 0.295838 1.758 0.078742 .
## as.factor(GRADES)3 0.701697 2.017172 0.252651 2.777 0.005481 **
## as.factor(GRADES)4 0.668808 1.951909 0.244717 2.733 0.006276 **
## as.factor(GRADES)5 0.707530 2.028973 0.250421 2.825 0.004723 **
## as.factor(GRADES)6 0.344214 1.410881 0.236150 1.458 0.144949
## as.factor(GRADES)7 0.391113 1.478625 0.237484 1.647 0.099578 .
## as.factor(GRADES)8 0.524292 1.689263 0.220388 2.379 0.017362 *
## as.factor(GRADES)9 0.393780 1.482575 0.230085 1.711 0.086997 .
## as.factor(GRADES)10 0.177886 1.194689 0.228746 0.778 0.436771
## as.factor(GRADES)11 0.214700 1.239490 0.235460 0.912 0.361857
## as.factor(GRADES)12 -0.033569 0.966988 0.218656 -0.154 0.877984
## as.factor(GRADES)13 -0.132898 0.875555 0.243908 -0.545 0.585843
## as.factor(GRADES)14 -0.214628 0.806841 0.241954 -0.887 0.375046
## as.factor(GRADES)15 -0.177631 0.837251 0.272086 -0.653 0.513853
## as.factor(GRADES)16 -0.170102 0.843579 0.237427 -0.716 0.473721
## as.factor(GRADES)17 -0.615065 0.540606 0.248169 -2.478 0.013197 *
## as.factor(MARRY)3 0.708942 2.031840 0.062116 11.413 < 2e-16 ***
## as.factor(MARRY)4 0.065615 1.067815 0.101914 0.644 0.519686
## as.factor(MARRY)5 0.030171 1.030631 0.144768 0.208 0.834910
## as.factor(MARRY)6 0.156516 1.169430 0.096475 1.622 0.104728
## as.factor(MARRY)8 0.746167 2.108901 0.336343 2.218 0.026523 *
## BMI -0.013787 0.986307 0.004682 -2.945 0.003232 **
## AVGSMK 0.004971 1.004984 0.001629 3.051 0.002279 **
## SIZE -0.022890 0.977370 0.008506 -2.691 0.007120 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## as.factor(BOOZE_q)0–0.5/week 0.8765 1.1409 0.7554 1.0171
## as.factor(BOOZE_q)0.5–2/week 0.6767 1.4778 0.5951 0.7695
## as.factor(BOOZE_q)>2/week 0.7167 1.3954 0.6391 0.8037
## SEX 0.4616 2.1666 0.4192 0.5083
## as.factor(RACE)2 0.7665 1.3047 0.6609 0.8888
## as.factor(RACE)3 0.5982 1.6716 0.4051 0.8834
## as.factor(GRADES)1 1.8221 0.5488 0.9846 3.3718
## as.factor(GRADES)2 1.6822 0.5945 0.9420 3.0039
## as.factor(GRADES)3 2.0172 0.4957 1.2294 3.3098
## as.factor(GRADES)4 1.9519 0.5123 1.2082 3.1533
## as.factor(GRADES)5 2.0290 0.4929 1.2420 3.3146
## as.factor(GRADES)6 1.4109 0.7088 0.8881 2.2413
## as.factor(GRADES)7 1.4786 0.6763 0.9283 2.3551
## as.factor(GRADES)8 1.6893 0.5920 1.0967 2.6019
## as.factor(GRADES)9 1.4826 0.6745 0.9444 2.3274
## as.factor(GRADES)10 1.1947 0.8370 0.7630 1.8705
## as.factor(GRADES)11 1.2395 0.8068 0.7813 1.9664
## as.factor(GRADES)12 0.9670 1.0341 0.6299 1.4844
## as.factor(GRADES)13 0.8756 1.1421 0.5428 1.4122
## as.factor(GRADES)14 0.8068 1.2394 0.5022 1.2964
## as.factor(GRADES)15 0.8373 1.1944 0.4912 1.4271
## as.factor(GRADES)16 0.8436 1.1854 0.5297 1.3435
## as.factor(GRADES)17 0.5406 1.8498 0.3324 0.8793
## as.factor(MARRY)3 2.0318 0.4922 1.7989 2.2949
## as.factor(MARRY)4 1.0678 0.9365 0.8745 1.3039
## as.factor(MARRY)5 1.0306 0.9703 0.7760 1.3688
## as.factor(MARRY)6 1.1694 0.8551 0.9680 1.4128
## as.factor(MARRY)8 2.1089 0.4742 1.0908 4.0771
## BMI 0.9863 1.0139 0.9773 0.9954
## AVGSMK 1.0050 0.9950 1.0018 1.0082
## SIZE 0.9774 1.0232 0.9612 0.9938
##
## Concordance= 0.661 (se = 0.006 )
## Likelihood ratio test= 651 on 31 df, p=<2e-16
## Wald test = 649.1 on 31 df, p=<2e-16
## Score (logrank) test = 673.9 on 31 df, p=<2e-16
cox.zph(cox)
## chisq df p
## as.factor(BOOZE_q) 7.813 3 0.05004
## SEX 11.185 1 0.00082
## as.factor(RACE) 1.933 2 0.38039
## as.factor(GRADES) 20.679 17 0.24099
## as.factor(MARRY) 4.452 5 0.48634
## BMI 0.173 1 0.67753
## AVGSMK 0.458 1 0.49844
## SIZE 0.264 1 0.60707
## GLOBAL 48.114 31 0.02566
plot(cox.zph(cox))








#Product term with SEX Cox regression
cox_product <- coxph(Surv(FU, DEATH) ~ as.factor(BOOZE_q)*SEX +
as.factor(RACE) + as.factor(GRADES) + as.factor(MARRY) +
BMI + AVGSMK + SIZE, data = d, ties = "efron")
summary(cox_product)
## Call:
## coxph(formula = Surv(FU, DEATH) ~ as.factor(BOOZE_q) * SEX +
## as.factor(RACE) + as.factor(GRADES) + as.factor(MARRY) +
## BMI + AVGSMK + SIZE, data = d, ties = "efron")
##
## n= 9250, number of events= 2145
##
## coef exp(coef) se(coef) z Pr(>|z|)
## as.factor(BOOZE_q)0–0.5/week -0.161405 0.850947 0.236705 -0.682 0.495313
## as.factor(BOOZE_q)0.5–2/week -0.453902 0.635145 0.194085 -2.339 0.019352
## as.factor(BOOZE_q)>2/week -0.567586 0.566892 0.166506 -3.409 0.000653
## SEX -0.817512 0.441529 0.064487 -12.677 < 2e-16
## as.factor(RACE)2 -0.266209 0.766279 0.075599 -3.521 0.000429
## as.factor(RACE)3 -0.511089 0.599842 0.198899 -2.570 0.010182
## as.factor(GRADES)1 0.597777 1.818073 0.314051 1.903 0.056983
## as.factor(GRADES)2 0.516572 1.676272 0.295965 1.745 0.080919
## as.factor(GRADES)3 0.705668 2.025200 0.252666 2.793 0.005224
## as.factor(GRADES)4 0.674080 1.962227 0.244765 2.754 0.005887
## as.factor(GRADES)5 0.712374 2.038825 0.250470 2.844 0.004453
## as.factor(GRADES)6 0.347922 1.416121 0.236162 1.473 0.140688
## as.factor(GRADES)7 0.394282 1.483318 0.237521 1.660 0.096917
## as.factor(GRADES)8 0.527903 1.695374 0.220403 2.395 0.016612
## as.factor(GRADES)9 0.394902 1.484239 0.230093 1.716 0.086112
## as.factor(GRADES)10 0.181665 1.199212 0.228753 0.794 0.427108
## as.factor(GRADES)11 0.217739 1.243262 0.235479 0.925 0.355141
## as.factor(GRADES)12 -0.032482 0.968040 0.218671 -0.149 0.881915
## as.factor(GRADES)13 -0.134216 0.874401 0.243941 -0.550 0.582183
## as.factor(GRADES)14 -0.216948 0.804972 0.241980 -0.897 0.369958
## as.factor(GRADES)15 -0.178067 0.836886 0.272107 -0.654 0.512854
## as.factor(GRADES)16 -0.168185 0.845198 0.237443 -0.708 0.478749
## as.factor(GRADES)17 -0.613072 0.541684 0.248185 -2.470 0.013503
## as.factor(MARRY)3 0.713826 2.041788 0.062203 11.476 < 2e-16
## as.factor(MARRY)4 0.063219 1.065260 0.101959 0.620 0.535231
## as.factor(MARRY)5 0.032512 1.033046 0.144813 0.225 0.822363
## as.factor(MARRY)6 0.158054 1.171229 0.096484 1.638 0.101392
## as.factor(MARRY)8 0.747464 2.111638 0.336519 2.221 0.026340
## BMI -0.013141 0.986945 0.004706 -2.793 0.005227
## AVGSMK 0.004964 1.004976 0.001629 3.046 0.002316
## SIZE -0.022786 0.977472 0.008512 -2.677 0.007428
## as.factor(BOOZE_q)0–0.5/week:SEX 0.019546 1.019738 0.150620 0.130 0.896748
## as.factor(BOOZE_q)0.5–2/week:SEX 0.042411 1.043323 0.131314 0.323 0.746717
## as.factor(BOOZE_q)>2/week:SEX 0.183497 1.201412 0.120545 1.522 0.127949
##
## as.factor(BOOZE_q)0–0.5/week
## as.factor(BOOZE_q)0.5–2/week *
## as.factor(BOOZE_q)>2/week ***
## SEX ***
## as.factor(RACE)2 ***
## as.factor(RACE)3 *
## as.factor(GRADES)1 .
## as.factor(GRADES)2 .
## as.factor(GRADES)3 **
## as.factor(GRADES)4 **
## as.factor(GRADES)5 **
## as.factor(GRADES)6
## as.factor(GRADES)7 .
## as.factor(GRADES)8 *
## as.factor(GRADES)9 .
## as.factor(GRADES)10
## as.factor(GRADES)11
## as.factor(GRADES)12
## as.factor(GRADES)13
## as.factor(GRADES)14
## as.factor(GRADES)15
## as.factor(GRADES)16
## as.factor(GRADES)17 *
## as.factor(MARRY)3 ***
## as.factor(MARRY)4
## as.factor(MARRY)5
## as.factor(MARRY)6
## as.factor(MARRY)8 *
## BMI **
## AVGSMK **
## SIZE **
## as.factor(BOOZE_q)0–0.5/week:SEX
## as.factor(BOOZE_q)0.5–2/week:SEX
## as.factor(BOOZE_q)>2/week:SEX
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## as.factor(BOOZE_q)0–0.5/week 0.8509 1.1752 0.5351 1.3533
## as.factor(BOOZE_q)0.5–2/week 0.6351 1.5744 0.4342 0.9291
## as.factor(BOOZE_q)>2/week 0.5669 1.7640 0.4090 0.7857
## SEX 0.4415 2.2649 0.3891 0.5010
## as.factor(RACE)2 0.7663 1.3050 0.6607 0.8887
## as.factor(RACE)3 0.5998 1.6671 0.4062 0.8858
## as.factor(GRADES)1 1.8181 0.5500 0.9824 3.3646
## as.factor(GRADES)2 1.6763 0.5966 0.9385 2.9941
## as.factor(GRADES)3 2.0252 0.4938 1.2342 3.3231
## as.factor(GRADES)4 1.9622 0.5096 1.2145 3.1703
## as.factor(GRADES)5 2.0388 0.4905 1.2479 3.3310
## as.factor(GRADES)6 1.4161 0.7062 0.8914 2.2497
## as.factor(GRADES)7 1.4833 0.6742 0.9312 2.3627
## as.factor(GRADES)8 1.6954 0.5898 1.1007 2.6114
## as.factor(GRADES)9 1.4842 0.6737 0.9455 2.3300
## as.factor(GRADES)10 1.1992 0.8339 0.7659 1.8776
## as.factor(GRADES)11 1.2433 0.8043 0.7837 1.9724
## as.factor(GRADES)12 0.9680 1.0330 0.6306 1.4860
## as.factor(GRADES)13 0.8744 1.1436 0.5421 1.4104
## as.factor(GRADES)14 0.8050 1.2423 0.5010 1.2935
## as.factor(GRADES)15 0.8369 1.1949 0.4910 1.4265
## as.factor(GRADES)16 0.8452 1.1832 0.5307 1.3461
## as.factor(GRADES)17 0.5417 1.8461 0.3330 0.8811
## as.factor(MARRY)3 2.0418 0.4898 1.8074 2.3065
## as.factor(MARRY)4 1.0653 0.9387 0.8723 1.3009
## as.factor(MARRY)5 1.0330 0.9680 0.7778 1.3721
## as.factor(MARRY)6 1.1712 0.8538 0.9694 1.4150
## as.factor(MARRY)8 2.1116 0.4736 1.0919 4.0838
## BMI 0.9869 1.0132 0.9779 0.9961
## AVGSMK 1.0050 0.9950 1.0018 1.0082
## SIZE 0.9775 1.0230 0.9613 0.9939
## as.factor(BOOZE_q)0–0.5/week:SEX 1.0197 0.9806 0.7591 1.3699
## as.factor(BOOZE_q)0.5–2/week:SEX 1.0433 0.9585 0.8066 1.3496
## as.factor(BOOZE_q)>2/week:SEX 1.2014 0.8324 0.9486 1.5216
##
## Concordance= 0.661 (se = 0.006 )
## Likelihood ratio test= 653.3 on 34 df, p=<2e-16
## Wald test = 658.5 on 34 df, p=<2e-16
## Score (logrank) test = 694.2 on 34 df, p=<2e-16
cox.zph(cox_product)
## chisq df p
## as.factor(BOOZE_q) 7.669 3 0.05337
## SEX 11.045 1 0.00089
## as.factor(RACE) 1.948 2 0.37762
## as.factor(GRADES) 20.607 17 0.24437
## as.factor(MARRY) 4.475 5 0.48325
## BMI 0.172 1 0.67826
## AVGSMK 0.472 1 0.49207
## SIZE 0.253 1 0.61527
## as.factor(BOOZE_q):SEX 12.584 3 0.00563
## GLOBAL 47.959 34 0.05671
plot(cox.zph(cox_product))








